clear 
***read in data****
use "/home/cramer/lpd-jpjr.dta"

set more off
*log using input_maker.log, replace

drop if lfs==-1
keep if mdate>=tm(2001m10)


keep if match == 1 | mis==5 | mis==1

gen months_dur = ceil(udur/4.01) if !missing(udur) & udur>=0

gen zero_months_dur= (months_dur ==0)

gen six_months_dur = (months_dur==7)

gen seven_months_dur = (months_dur==8)

*bys pid (mis): gen udur_un = udur[_n-1] if lfs == 3 & lfs[_n-1] == 2 & lft==6 & mis == mis[_n-1] + 1 & mis != 5

bys pid (mis): gen months_dur_previous = months_dur[_n-1] if mis == mis[_n-1] + 1 & mis != 5 & !missing(months_dur[_n-1]) 
bys pid (mis): gen udur_previous = udur[_n-1] if mis == mis[_n-1] + 1 & mis != 5 & !missing(udur[_n-1]) & udur[_n-1]>=0



gen prev_unemp = 0 if missing(months_dur_previous)
replace prev_unemp = 1 if !missing(months_dur_previous) & months_dur_previous<=6
replace prev_unemp = 2 if !missing(months_dur_previous) & months_dur_previous>6


label define prev_unemp 0 "not unemployed" 1 "short-term unemplyoed" 2 "long-term unemployed"

****25 to 54 plus people who were 54 before***
keep if age>24 & age<=55
/*
gen dur0_5 = (lfs==2 & udur>=0 & udur <5 & !missing(udur))
gen dur6_14  = (lfs==2  & udur>5 & udur <=14 & !missing(udur))
gen dur15_26  = (lfs==2 &  udur>14 & udur <=26 & !missing(udur))
gen dur27_52  = (lfs==2 &   udur>26 & udur<=52 & !missing(udur))
gen dur53plus  = (lfs==2 &   udur>52 & !missing(udur))

label define dur_to_e 1 "0 to 5" 2 "5 to 14" 3 "15 to 26" 4 "26-52" 5 "53+"

**label values dur_to_e dur_to_e

gen dur0_5_to_n = (lft==6 & udur_un>=0 & udur_un <5 & !missing(udur_un))
gen dur6_14_to_n = (lft==6 & udur_un>5 & udur_un <=14 & !missing(udur_un))
gen dur15_26_to_n = (lft==6 & udur_un>14 & udur_un <=26 & !missing(udur_un))
gen dur27_52_to_n = (lft==6 & udur_un>26 & udur_un<=52 & !missing(udur_un))
gen dur53plus_to_n = (lft==6 & udur_un>52 & !missing(udur_un))

*label define dur_to_n 1 "0 to 5" 2 "5 to 14" 3 "15 to 26" 4 "26-52" 5 "53+"

*gen months_udur = floor(udur/4) if udur>=0 & !missing(udur)

*tab months_udur, gen(months_udurbuckets)

*tab lft, gen(flows_buckets)
*/






gen EE = (lft==0 | lft==1)
gen EU = (lft==2)
gen EN = (lft==3)
gen UE_short = (lft==4 & !missing(months_dur_previous) & months_dur_previous<=7) 
gen UE_long = (lft==4 & !missing(months_dur_previous) & months_dur_previous>7)
gen UU_short = (lft==5 & !missing(months_dur_previous) & months_dur_previous<7)
gen UU_long = (lft==5 & !missing(months_dur_previous) & months_dur_previous>7)
gen UN_short = (lft==6 & !missing(months_dur_previous) & months_dur_previous<=7)
gen UN_long = (lft==6 & !missing(months_dur_previous) & months_dur_previous>7)
gen NE = (lft==7)
gen NU = (lft==8)
gen NN = (lft==9)

drop if mis==1 | mis==5 

gen round_weight = round(sswgt)

*collapse (sum) E U N EE EU EN UE UU UN NE NU NN dur*  [fweight = round_weight], by(mdate)

collapse (sum) EE EU EN UE_* UU_* UN_* NE NU NN zero_months six_month seven_ [fweight = round_weight], by(mdate)

format mdate %tm

*foreach i in 0_5 6_14 15_26 27_52 53plus {
*gen transitionrate_UN_`i' =  dur`i'_to_n/dur`i'[_n-1]
*}
gen month = 1 if mdate== tm(2001m1) | mdate==tm(2002m1) | mdate==tm(2003m1) | mdate==tm(2004m1) | mdate==tm(2005m1) | mdate==tm(2006m1) | mdate==tm(2007m1) | mdate==tm(2008m1) | mdate==tm(2009m1)| mdate==tm(2010m1) | mdate==tm(2011m1) | mdate==tm(2012m1)

forvalues i = 2/12 {
replace month = `i' if mdate== tm(2001m`i') | mdate== tm(2002m`i') | mdate==tm(2003m`i') | mdate==tm(2004m`i') | mdate==tm(2005m`i') | mdate==tm(2006m`i') | mdate==tm(2007m`i') | mdate==tm(2008m`i') | mdate==tm(2009m`i')| mdate==tm(2010m`i') | mdate==tm(2011m`i') | mdate==tm(2012m`i')
}

foreach state in EE EU EN {
gen transition_rate_`state' = `state'/ (EE + EU + EN)
}

foreach state in UE_short UU_short UN_short {
gen transition_rate_`state' = `state'/ (UE_short + UU_short + UN_short)
}

foreach state in UE_long UU_long UN_long {
gen transition_rate_`state' = `state'/ (UE_long + UU_long + UN_long)
}

foreach state in NE NU NN {
gen transition_rate_`state' = `state'/ (NE + NU + NN)
}

gen psi_n = transition_rate_NU / transition_rate_NE

gen psi_e = transition_rate_EN / transition_rate_EU

gen psi_u_short = transition_rate_UN_short / transition_rate_UE_short

gen psi_u_long = transition_rate_UN_long / transition_rate_UE_long

tab month, gen(S_adj)

constraint 1 S_adj1 + S_adj1 + S_adj1 + S_adj2 + S_adj3 + S_adj4 + S_adj5 + S_adj6 + S_adj7 + S_adj8 + S_adj9 + S_adj10 + S_adj11 + S_adj12 = 0

foreach var of varlist psi_n-psi_u_long {
gen `var'_resids = 0
}


foreach var of varlist psi_n-psi_u_long {
quietly cnsreg `var' S_adj*, constraints(1)
replace `var'_resids = `var' - _b[S_adj1] if month==1
replace `var'_resids = `var' - _b[S_adj2] if month==2
replace `var'_resids = `var' - _b[S_adj3] if month==3
replace `var'_resids = `var' - _b[S_adj4] if month==4
replace `var'_resids = `var' - _b[S_adj5] if month==5
replace `var'_resids = `var' - _b[S_adj6] if month==6
replace `var'_resids = `var' - _b[S_adj7] if month==7
replace `var'_resids = `var' - _b[S_adj8] if month==8
replace `var'_resids = `var' - _b[S_adj9] if month==9
replace `var'_resids = `var' - _b[S_adj10] if month==10
replace `var'_resids = `var' - _b[S_adj11] if month==11
replace `var'_resids = `var' - _b[S_adj12] if month==12
}

****We use the three-month moving averages of the following variables, along with the published data and thetas to run the calibration in python***

keep mdate zero_m six_m seven_m psi_n_r psi_e_resi psi_u_short_re psi_u_long_resids

save master_input_2002_2007.dta, replace
